####################################################################
#***2019 analysis
## Packages
# To install and open the R packages that you need for this code. 
need <- c('tidyverse','readstata13','lfe','glue','rdrobust', 'stargazer','arm', 'broom', 'ggplot2', 'dotwhisker', 'gridExtra', 'USAboundaries', 'rgdal', 'rgeos', 'sf')
have <- need %in% rownames(installed.packages()) 
if(any(!have)) install.packages(need[!have]) 
invisible(lapply(need, library, character.only=T)) 

# Change path to whereever you place the models
# To set up the working directory. 
script_folder = dirname(rstudioapi::getSourceEditorContext()$path)
setwd(glue('{script_folder}'))
rm(list = ls())
setwd("../")


#install.packages("maps")
library(maps)
#install.packages("maptools")
library(maptools)
#install.packages("rgdal")
library(rgdal)
#install.packages("rgeos")
library(rgeos)

#install.packages("mapproj")
library(mapproj)
library(USAboundaries)
install.packages("USAboundariesData", repos = "https://ropensci.r-universe.dev", type = "source")


load("5prepdata/final_rd_data.RData")

#### clean up rd data ####
rd.data <- rd.data %>% 
  ungroup %>% 
  mutate(district_id = str_replace_all(district_id, "[:punct:]", "")) %>% 
  mutate(district_fips = ifelse(
    str_length(district_id)==1, paste("00", district_id, sep=""), ifelse(
      str_length(district_id)==2, paste("0", district_id, sep=""), paste(district_id))
  ))

rd.data <- rd.data %>% 
  ungroup %>% 
 mutate(district_id = as.character(district.x)) %>% 
  mutate(district_fips = ifelse(
    str_length(district_id)==1, paste("00", district_id, sep=""), ifelse(
      str_length(district_id)==2, paste("0", district_id, sep=""), paste(district_id))
  ))


## get a bandwidth-limited dataset for description purposes:
rd.data$cutoff<-NULL
rd.data$bandwidth<-NULL
rd.data$kw<-NULL

y<-rdbwselect(rd.data$ever_winhouse, rd.data$victory_marg, p=1, c = 0,  kernel = "tri", bwselect="mserd")
rd.data$bandwidth<-y$bws[1]
rm(y)

rd.data$cutoff <- 0
rd.data$kw <- 1-(abs(rd.data$cutoff-rd.data$victory_marg))/rd.data$bandwidth
rd.data$kw[rd.data$victory_marg>(rd.data$cutoff+rd.data$bandwidth) | rd.data$victory_marg<(rd.data$cutoff-rd.data$bandwidth)] <- 0


rd.bw <- rd.data %>% filter(kw > 0)

data(state.fips)

# merge in state_fips:
state.fips <- left_join(state.fips, data.frame(state.name, abb =state.abb)) %>% 
  group_by(state.name) %>% 
  filter(row_number()==1)

rd.data <- rename(rd.data, state.name = state_fips)
rd.bw <- rd.bw %>% rename(state.name = state_fips)

rd.data <- left_join(rd.data, state.fips, by = 'state.name')
rd.bw <- left_join(rd.bw, state.fips, by = 'state.name')

rd.data <- filter(rd.data, !is.na(fips))
rd.bw <- filter(rd.bw, !is.na(fips))
rd.bw <- rd.bw %>% 
  ungroup %>% 
  mutate(fips = ifelse(str_length(fips)==1, paste("0", fips, sep=""), fips))
rd.data <- rd.data %>% 
  ungroup %>% 
  mutate(fips = ifelse(str_length(fips)==1, paste("0", fips, sep=""), fips))


rd.bw <- rd.bw %>% 
  ungroup %>% 
  mutate(id = paste(fips, district_fips, sep="")) %>% 
  filter(year >= 2001 & year < 2011)
rd.data <- rd.data %>% 
  ungroup %>% 
  mutate(id = paste(fips, district_fips, sep="")) %>% 
  filter(year >= 2001 & year < 2011)


rd.bw.merge <- rd.bw %>% 
  ungroup %>% 
  group_by(id) %>% 
  summarize(total = n())

rd.merge <- rd.data %>% 
  ungroup %>% 
  group_by(id) %>% 
  summarize(total_sample = n())

rd.merge <- left_join(rd.merge, rd.bw.merge)

rd.merge$total[is.na(rd.merge$total)] <- 0
rd.merge$prop <- rd.merge$total/rd.merge$total_sample

# massachussetts and vermont shape files are fucked
#test <- data.frame(state = sld2@data[["STATE"]], dist = sld2@data[["SLDL"]])


#### read in shape files ####

sld2 <- readShapeSpatial("1data/shapefiles/House_2000s", IDvar="GEO_ID")

sld2@data$GEO_ID <- rownames(sld2@data)

sld2.df     <- fortify(sld2)
sld2.df <- filter(sld2.df, long < 0 & long > -150 & lat < 50 )

sld2.df <- left_join(sld2.df, rd.merge, by = "id")
sld2.df$total[is.na(sld2.df$total)] <- 0

states_map <- map_data("state")

sld2.df$total2 <- 0
sld2.df$total2[sld2.df$total > 2] <- 3
sld2.df$total2[sld2.df$total == 2] <- 2
sld2.df$total2[sld2.df$total ==1] <- 1

sld2.df$prop[is.na(sld2.df$prop)] <- 0

# ggplot(sld2.df, aes(x=long, y=lat, group=group))+
#   geom_polygon(fill="white", color="black")+
#   coord_map()

# state leg map
ggplot(sld2.df, aes(x=long, y=lat, group=group))+
  geom_polygon(aes(fill=prop))+
  scale_fill_gradient(low = "white", high="red") +
  coord_map() +
  geom_polygon(data=states_map %>% filter(region %in% c("nebraska", "minnesota", "louisiana", "vermont", "massachusetts")), aes(x=long, y=lat), fill="#eeeeee" , color=NA,size=.5) +
  geom_polygon(data=states_map, aes(x=long, y=lat), colour='gray20', fill=NA,size=.2) +
  theme_bw() +
  theme(axis.line=element_blank(),
        axis.ticks=element_blank(),
        axis.text=element_blank(),
        axis.title=element_blank()) +
  theme(panel.grid.minor=element_blank(),
        panel.grid.major=element_blank(),
        panel.border = element_blank(),
        plot.title = element_text(hjust = 0.5)) + theme(legend.position="none") 
  ggsave(file = "7tex/manuscript/tables/sourcefiles/Appendix Figure 4a.pdf", units="in", width=6, height=4)


# congress map:
# first create PVI
pvi2000 <- read.csv("1data/pvi/2000.csv", stringsAsFactors = F) %>% mutate(year = 2000, District = as.character(District))
pvi2002 <- read.csv("1data/pvi/2002.csv", stringsAsFactors = F) %>% mutate(year = 2002, District = as.character(District))
pvi2004 <- read.csv("1data/pvi/2004.csv", stringsAsFactors = F) %>% mutate(year = 2004, District = as.character(District))
pvi2006 <- read.csv("1data/pvi/2006.csv", stringsAsFactors = F) %>% dplyr::select(State:PVI) %>% mutate(year = 2006, District = as.character(District))
pvi2010 <- read.csv("1data/pvi/2010.csv", stringsAsFactors = F) %>% dplyr::select(State:PVI) %>% mutate(year = 2010, District = as.character(District))
pvi2012 <- read.csv("1data/pvi/2012.csv", stringsAsFactors = F) %>% mutate(year = 2012)
pvi2014 <- read.csv("1data/pvi/2014.csv", stringsAsFactors = F) %>% mutate(year = 2014)

pvi <- bind_rows(pvi2000, pvi2002, pvi2004, pvi2006, pvi2010, pvi2012, pvi2014)

pvi <- pvi %>%
  mutate(party_adv = ifelse(str_detect(PVI, "R"), "R", "D")) %>%
  mutate(adv = str_replace_all(PVI, "[:alpha:]|[+]", ""))

pvi$adv[pvi$adv == ""] <- 0

rm(pvi2000, pvi2002, pvi2004, pvi2006, pvi2010, pvi2012, pvi2014)

us_cong <-  us_congressional(resolution = "high")

pvi <- rename(pvi, state_name = State, cd115fp = District) %>% 
  ungroup %>% 
  mutate(cd115fp = ifelse(str_length(cd115fp) == 1, paste("0", cd115fp, sep=""), cd115fp)) %>% 
  filter(year %in% c(2004, 2008)) %>%
  mutate(adv = as.numeric(adv)) %>% 
  group_by(state_name, cd115fp) %>% 
  summarize(adv = mean(adv, na.rm=T)) %>% 
  dplyr::select(state_name, cd115fp, adv)

pvi$cd115fp[pvi$cd115fp=="99"] <- "00"

#library(sf)
us_cong <- sf::st_as_sf(us_cong)

us_cong <- left_join(us_cong, pvi, by = c("state_name", "cd116fp" = "cd115fp"))


us_cong$adv <- as.numeric(us_cong$adv)
us_cong$adv[is.na(us_cong$adv)] <- 0
us_cong$comp <- 0

us_cong$comp[us_cong$adv <= 2.5] <- 7
us_cong$comp[us_cong$adv > 2.5 & us_cong$adv <= 5] <- 6
us_cong$comp[us_cong$adv > 5 & us_cong$adv <= 7.5] <- 5
us_cong$comp[us_cong$adv > 7.5 & us_cong$adv <= 10] <- 4
us_cong$comp[us_cong$adv > 10 & us_cong$adv <= 15] <- 3
us_cong$comp[us_cong$adv > 15 & us_cong$adv <= 20 ] <- 2
us_cong$comp[us_cong$adv > 20] <- 1


ggplot(us_cong %>% filter(state_name %in% state.fips$state.name)) + 
  geom_sf(aes(geometry = geometry, fill=comp), colour='gray20', size=.2) +
  scale_fill_gradient(low = "white", high="dodgerblue") +
  coord_sf() +
  #geom_polygon(data=states_map %>% filter(region %in% c("nebraska", "minnesota", "louisiana", "vermont", "massachusetts")), aes(x=long, y=lat, group=group), fill="#eeeeee" , color=NA,size=.5) +
  theme_bw() +
  theme(axis.line=element_blank(),
        axis.ticks=element_blank(),
        axis.text=element_blank(),
        axis.title=element_blank()) +
  theme(panel.grid.minor=element_blank(),
        panel.grid.major=element_blank(),
        panel.border = element_blank(),
        plot.title = element_text(hjust = 0.5)) + theme(legend.position="none") 
  ggsave(file = "7tex/manuscript/tables/sourcefiles/Appendix Figure 4b.pdf", units="in", width=6, height=4)

# %>% filter(startsWith(id, "01")
